In [1]:
# Load required libraries
library(syuzhet)
library(dplyr)
library(tidyr)
library(readr)
library(plotly)
# Read the CSV file containing YouTube comments (adjust the path as necessary)
data <- read_csv("fds dst.csv")
# Perform sentiment analysis using NRC lexicon
nrc_sentiment <- get_nrc_sentiment(data$Comments)
# Display the structure of nrc_sentiment to check if emotion columns are present
str(nrc_sentiment)
# Combine sentiment scores with the original data
results <- cbind(data, nrc_sentiment)
# Print column names to ensure the emotion columns are included
print(colnames(results))
# Summarize scores for the eight emotions
emotion_summary <- results %>%
select(anger, anticipation, disgust, fear, joy, sadness, surprise, trust) %>%
summarise(across(everything(), sum))
# Print the summarized emotion data for debugging
print(emotion_summary)
# Reshape the data for visualization
emotion_long <- emotion_summary %>%
pivot_longer(cols = everything(), names_to = "emotion", values_to = "score")
# Create an interactive histogram for the eight emotions using Plotly
p <- plot_ly(emotion_long,
x = ~emotion,
y = ~score,
type = 'bar',
text = ~score,
hoverinfo = 'text',
marker = list(color = 'coral', # Color set to coral
line = list(color = 'rgba(8,48,107,1.0)', width = 1.5))) %>%
layout(title = "Emotion Analysis: Eight Emotions",
xaxis = list(title = "Emotion Category"),
yaxis = list(title = "Total Score"),
showlegend = FALSE)
# Display the interactive plot
p
Warning message:
"package 'syuzhet' was built under R version 4.4.2"
Warning message:
"package 'dplyr' was built under R version 4.4.2"
Attaching package: 'dplyr'
The following objects are masked from 'package:stats':
filter, lag
The following objects are masked from 'package:base':
intersect, setdiff, setequal, union
Warning message:
"package 'tidyr' was built under R version 4.4.2"
Warning message:
"package 'readr' was built under R version 4.4.2"
Warning message:
"package 'plotly' was built under R version 4.4.2"
Loading required package: ggplot2
Attaching package: 'plotly'
The following object is masked from 'package:ggplot2':
last_plot
The following object is masked from 'package:stats':
filter
The following object is masked from 'package:graphics':
layout
Rows: 3271 Columns: 5
── Column specification ────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (3): Platform, Comments, Random Date
dbl (2): 1, Label
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
'data.frame': 3271 obs. of 10 variables: $ anger : num 0 0 1 0 0 0 0 0 0 0 ... $ anticipation: num 2 2 1 1 1 2 1 1 0 2 ... $ disgust : num 0 0 1 0 0 0 0 0 0 0 ... $ fear : num 0 0 1 0 0 0 0 0 0 0 ... $ joy : num 1 2 0 0 2 0 0 2 0 0 ... $ sadness : num 0 0 1 1 0 0 0 1 0 0 ... $ surprise : num 1 1 0 0 1 0 0 1 0 0 ... $ trust : num 2 2 2 1 1 0 1 1 0 2 ... $ negative : num 0 0 1 0 0 0 0 1 0 1 ... $ positive : num 2 3 3 3 2 0 2 2 2 1 ... [1] "1" "Platform" "Comments" "Label" "Random Date" [6] "anger" "anticipation" "disgust" "fear" "joy" [11] "sadness" "surprise" "trust" "negative" "positive" anger anticipation disgust fear joy sadness surprise trust 1 482 1983 401 777 2148 795 859 3121
In [ ]: